home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The Atari Compendium
/
The Atari Compendium (Toad Computers) (1994).iso
/
files
/
umich
/
utils
/
nroff~06.zoo
/
hyph.c
< prev
next >
Wrap
C/C++ Source or Header
|
1992-07-17
|
38KB
|
1,664 lines
static char *rcsid_hyph_c="$Id: hyph.c,v 1.2 1992/07/16 21:42:50 rosenkra Exp $";
/*
* $Log: hyph.c,v $
* Revision 1.2 1992/07/16 21:42:50 rosenkra
* new at this rev. ported to gcc.
*
*/
#undef DEBUG_HYPHEN
#undef STANDALONE
#ifdef STANDALONE
# ifndef DEBUG_HYPHEN
# define DEBUG_HYPHEN
# endif
#endif
/*
* hyph - An implementation of the Knuth hyphenation algorithm, modified
*
* currently, hyphen ret 0 if cannot be hyphenated, 1 for it was (or is)
* soft hyphenated. should modify so that ret -1 if soft hyphenated or
* n (positive) where n is index into string where there is a hard hyphen.
* either that or check for a hard hyphen before even calling this routine
*
* two entry points here:
*
* char *do_hyphen (char *, int, char **, char **);
* int hyphen (char *, char *);
*/
#include <stdio.h>
#include <ctype.h>
#ifdef ALCYON
# ifndef void
# define void int
# endif
#endif
/*
* #defines for non-printing ASCII characters
*/
#define NUL 0x00 /* ^@ */
#define SOH 0x01 /* ^A */
#define STX 0x02 /* ^B */
#define ETX 0x03 /* ^C */
#define EOT 0x04 /* ^D */
#define ENQ 0x05 /* ^E */
#define ACK 0x06 /* ^F */
#define BEL 0x07 /* ^G */
#define BS 0x08 /* ^H */
#define HT 0x09 /* ^I */
#define LF 0x0a /* ^J */
#define NL LF
#define VT 0x0b /* ^K */
#define FF 0x0c /* ^L */
#define CR 0x0d /* ^M */
#define SO 0x0e /* ^N */
#define SI 0x0f /* ^O */
#define DLE 0x10 /* ^P */
#define DC1 0x11 /* ^Q */
#define DC2 0x12 /* ^R */
#define DC3 0x13 /* ^S */
#define DC4 0x14 /* ^T */
#define NAK 0x15 /* ^U */
#define SYN 0x16 /* ^V */
#define ETB 0x17 /* ^W */
#define CAN 0x18 /* ^X */
#define EM 0x19 /* ^Y */
#define SUB 0x1a /* ^Z */
#define CPMEOF SUB
#define ESC 0x1b /* ^[ */
#define FS 0x1c /* ^\ */
#define GS 0x1d /* ^] */
#define RS 0x1e /* ^^ */
#define US 0x1f /* ^_ */
#define DEL 0x7f /* DEL */
/*
* if THING is #defined then words like "anything" will be split
* any-thing. On the other hand, if it's defined the bathing is
* split ba-thing. You can't have every-thing. In the present
* version most of the -thing words are in the exception list.
*
* #define THING
*/
/*
* Various psuedo-subroutines. HYPHEN defines a bit to set when a hyphen
* is inserted. HYPHENATE sets the bit, UNHYPHENATE clears it,
* HAS_HYPHEN tests for it. The ER macro checks for an "er" at the end
* of a word, it's used by the consonant pair checking routine.
* Is consonant returns true if c is a consonant.
*
* Note that a letter is marked if a hyphen can be inserted in front
* of it.
*/
#define HYPHEN 0x80 /* soft hyphen bit */
#define HYPHENATE(c) ((c) |= HYPHEN ) /* mark char */
#define UNHYPHENATE(c) ((c) &= ~HYPHEN ) /* unmark char */
#define HAS_HYPHEN(c) ((c) & HYPHEN ) /* is it marked? */
#define ER(p,end) ((*p & 0x7f)=='e' && (*(p+1) & 0x7f)=='r' && (p+1)==end)
#define isvowel(c) (islower((c) & 0x7f) && vt[((c) & 0x7f) - 'a'])
#define isconsonant(c) ((c) && !isvowel(c))
/* The dipthongs ch, gh, ph, sh and th are treated as single
* consonants. The subroutine nextch() will map these two
* characters into a single character as follows:
*/
#define CH ('z'+1) /* { 0x7b \173 */
#define GH ('z'+2) /* | 0x7c \174 */
#define PH ('z'+3) /* } 0x7d \175 */
#define SH ('z'+4) /* ~ 0x7e \176 */
#define TH ('z'+5) /* DEL 0x7f \177 */
/*
* global variables
*/
#ifdef DEBUG_HYPHEN
static int HyDebug = 0; /* True if debug diagnostics are to be printed*/
static void bprint ();
#endif
static char **States; /* Ptr to table for current state machine*/
static char vt[] =
{ 1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0 };
/* a b c d e f g h i j k l m n o p q r s t u v w x y z */
/*
* functions
*/
char *do_hyphen (); /* global entries */
int hyphen ();
static char *suffix ();
static char *prefix ();
static char nextch ();
static int isweird ();
static void consonants (); /* these are all local (static)... */
static void phyphen ();
static int next ();
static int exception ();
static int wcmp ();
static char *bsearch ();
#ifdef __GNUC__
#include <string.h>
#else
extern char *index (); /* these are external... */
#endif
#define MAXWLEN 132
/*------------------------------*/
/* do_hyphen */
/*------------------------------*/
char *do_hyphen (ps, n, p1, p2)
register char *ps; /* word to hyphenate */
int n; /* max number of chars before hyphen */
/* if < 0, hyphenate anyway */
/* if -1, hyphenate first avail location */
/* if -2, hyphenate last avail location */
/* if -3, hyphenate all locations */
char **p1; /* -> first part up to and including hyphen */
char **p2; /* -> last part after hyphen */
{
/*
* main entry point. space is allocated here so caller MUST use returned
* pointer before next call. ptr to hyphenated word is returned or NULL
* if it can't be hyphenated. word must be null terminated. max word len
* allowed is MAXWLEN.
*
* use:
*
* char *p1, *p2, *ps;
*
* if (ps = do_hyphen ("hyphenate", 4, &p1, &p2))
* {
* deal with hyphen:
*
* ps -> "hy-phenate"
* p1 -> "hy-..."
* p2 -> "phenate"
* }
*/
static char buf[MAXWLEN+2]; /* result */
char ibuf[MAXWLEN+2]; /* work space */
register char *ebuf; /* -> end of word */
register char *p;
register char *pb;
register int cnt;
/*
* too short? too long?
*/
if (strlen (ps) < 4 || strlen (ps) > MAXWLEN)
return ((char *) 0);
/*
* copy word to input buffer
*/
for (ebuf = ibuf; *ps; ps++, ebuf++)
*ebuf = *ps & 0x7f;
*ebuf = '\0';
/*
* check for hard hyphen
*/
for (p = ibuf; *p; p++)
{
if (*p == '-')
{
if (n >= 0)
{
if (p - ibuf + 1 > n)
return ((char *) 0);
}
strcpy (buf, ibuf);
*p1 = buf;
*p2 = (char *) (buf + (p - ibuf + 1));
return (buf);
}
}
/*
* do it. check if it worked. if not, return null
*/
ebuf--; /* -> last char */
if (hyphen (ibuf, ebuf))
{
if (n >= 0)
{
/*
* n was given. check if hyphen fits length
* needed...
*/
for (cnt = 1, p = ibuf, pb = buf; *p && p <= ebuf;
p++, pb++, cnt++)
{
if (HAS_HYPHEN(*p))
{
if (cnt > n)
return ((char *) 0);
else
{
*pb++ = '-';
*p1 = buf;
*p2 = pb;
while (*p)
*pb++ = *p++ & 0x7f;
*pb = '\0';
return (buf);
}
}
*pb = *p & 0x7f;
*(pb + 1) = '\0';
}
return ((char *) 0);
}
else if (n == -1)
{
/*
* -1 was given. hyphenate first avail location
*/
for (p = ibuf, pb = buf; *p && p <= ebuf; p++, pb++)
{
if (HAS_HYPHEN(*p))
{
*pb++ = '-';
*p1 = buf;
*p2 = pb;
while (*p)
*pb++ = *p++ & 0x7f;
*pb = '\0';
return (buf);
}
*pb = *p & 0x7f;
*(pb + 1) = '\0';
}
return ((char *) 0);
}
else if (n == -2)
{
/*
* -2 was given. hyphenate last avail location
*/
pb = &buf[MAXWLEN];
*pb = '\0';
for (p = &ibuf[strlen(ibuf)-1], pb--;
p >= ibuf; p--, pb--)
{
if (HAS_HYPHEN(*p))
{
*p1 = buf;
*p2 = pb;
*pb-- = '-';
while (p >= ibuf)
*pb-- = *p-- & 0x7f;
pb++;
return (pb);
}
*pb = *p & 0x7f;
*(pb + 1) = '\0';
}
return (buf);
}
else if (n == -3)
{
/*
* -3 was given. hyphenate all locations
*/
for (p = ibuf, pb = buf; *p && p <= ebuf; p++, pb++)
{
if (HAS_HYPHEN(*p))
{
*pb++ = '-';
*p1 = buf;
*p2 = pb;
}
*pb = *p & 0x7f;
*(pb + 1) = '\0';
}
*pb = '\0';
return (buf);
}
else
return ((char *) 0); /* error in n */
}
return ((char *) 0);
}
/*------------------------------*/
/* hyphen */
/*------------------------------*/
int hyph